## FISH 552, Introduction to R ## Beginning code for Lecture 6 -- Data Manipulation 2 ## This code is inteded to give you the datasets (made up or otherwise) # that we will use during the in-class examples. Add to this script the # code go over in class plus the Hands-On exercises. ########################## #data used with apply and tapply ######################### #Make a data matrix called subj rownm <- paste("Patient",1:20) colnm <- paste("Yr",1:5) subj <- matrix(round(rnorm(n=100),3),ncol=5,dimnames=list(rownm,colnm)) head(subj) apply(subj, MARGIN=1, mean) apply(subj, MARGIN=2, mean) apply(subj, MARGIN=1, quantile, c(0.025, 0.975)) #create 3 vectors that corespond to fish lengths, weights, and sex lengths <- sample(1:100,size=20, replace=T) genders <- sample(c("Male","Female","Unknown"), size=20, replace=T) weights <- sample(200:250, size=20, replace=T) tapply(X=lengths, INDEX=genders, FUN=mean) tapply(X=lengths, INDEX=list(genders, weights), FUN=mean) ################################## #data used with order() and sort() ################################### (cards <- sample(1:10)) #randomizes numbers from 1 to 10 and stores in object called cards sort(cards) rev(sort(cards)) order(cards) cards[order(cards)] YY<-data.frame(ID=sample(1:10),dev=round(rnorm(10),3)) YY[order(YY$ID),] # 10 normally distributed random numbers with mean 0 and std dev 1. # Rounded to 3 decimal places (rndNums <- round(rnorm(10),3)) ZZ<-data.frame( laws <- sample(1:3,replace=T, size=10), #Randomly draw 10 numbers ranging from 1 to 3. Numbers can be used more than once. year <- sample(2010:2012,replace=T,size=10), #Same as above but numbers range from 2010 to 2012 state <- c("WA","OR","CA","VT","NY","RI","FL","UT","AZ","TX") #10 different state abbriviations ) ZZ ZZ[order(ZZ$laws, ZZ$year, ZZ$state),] ################################## #Merge ################################### station1 <- cbind(time1=1:100, data=rnorm(100)) #note: a common mistake when using seq is to get #the length of the resulting vector wrong. Here it is #length 21. station2 <- cbind(time2=seq(0,100,5), category=sample(1:3,replace=T,size=21)) stationX<-merge(station1, station2, by.x="time1", by.y="time2",all=T) intersect ################################## #Working with dates and times ################################### everyday <- seq(from=as.Date("2014-01-01"), to=as.Date("2014-12-31"), by="day") #Note how you can use "day" as the interval in seq() when the data type is Date. Nifty. # loads a revised do.csv data file that has a column called dateTime that # contains the date and time for each data point in POSIX format. do <- read.csv("do.csv", header=T, colClasses=c(dateTime="POSIXlt"))